rm(list = ls())
library(sf)        
library(osmdata)    
library(ggplot2)    
library(tidyverse) 
library(readxl)
library(haven)

# Load Data ----------------------------------------------------------


dat0 <- read_csv("data/raw_data/mosul_survey.csv")

# merge in buffer damage (pre-processed based on respondent coordinates)
# UNITAR - UNOSAT
damage <- read_csv("data/processed_data/damage_buffer.csv")
dat0 <- merge(dat0, damage[c("SbjNum", "damage_in_10m", "damage_in_50m", "damage_in_100m", "damage_in_500m", "damage_in_1km")], by = "SbjNum")


# Fix people who moved ----------------------------------------------------------

# Ensure respondents:  
# (1) Lived in Mosul during the battle  
# (2) Did not move neighborhoods after the battle  
# (3) Have treatment and covariates that reflect these conditions  

# Remove respondents who did not live in Mosul during the campaign  
# [Q12_1] "Were you living in Mosul for any part of the military campaign to expel Daesh?"  
# 1 = Yes, 0 = No  
# This restricts the sample to 1,282 respondents  
dat1 <- dat0 %>%
  mutate(sample_1 = ifelse(!Q12_1 %in% 0, 1, 0)) %>%
  filter(sample_1 == 1)

# Fix issue with respondents who lived in a different house during the battle  
# [Q12_2] "Were you living in this house/apartment during the battle?"  
# 1 = This house, 2 = Different house  
table(dat1$Q12_2)  # 858 stayed, 152 moved, some unknown  

# [Q12_3] If different house, which neighborhood were you in during the battle?  
# Replace invalid neighborhood codes with NA  
dat1$Q12_3 <- ifelse(dat1$Q12_3 %in% c(96, 208), NA, dat1$Q12_3)  
# Correct misclassified neighborhoods  
dat1$Q12_3 <- recode(dat1$Q12_3, `183` = 182, `185` = 184, `190` = 189)  
# Update neighborhood for those who moved  
dat1$neighborhood_indicator <- ifelse(dat1$Q12_2 == 2, dat1$Q12_3, dat1$Neighborhood)  

# Replace damage values for those who moved using neighborhood-level means  
dat1 <- dat1 %>% 
  group_by(neighborhood_indicator) %>% 
  mutate(mean_damage_in_10m = mean(damage_in_10m), 
         mean_damage_in_50m = mean(damage_in_50m),
         mean_damage_in_100m = mean(damage_in_100m))
         
dat1$damage_in_10m <- ifelse(dat1$Q12_2 == 2, dat1$mean_damage_in_10m, dat1$damage_in_10m)
dat1$damage_in_50m <- ifelse(dat1$Q12_2 == 2, dat1$mean_damage_in_50m, dat1$damage_in_50m)
dat1$damage_in_100m <- ifelse(dat1$Q12_2 == 2, dat1$mean_damage_in_100m, dat1$damage_in_100m)


# Define Treatment ----------------------------------------------------------
 
# Load neighborhood location data (East/West of river)
EastWest_Indicated <- read_csv("data/raw_data/neighborhood_side.csv") %>%
  mutate(West_Mosul = ifelse(location == "West", 1, 0)) %>%
  select(neighborhood_indicator, West_Mosul)

# merge dataset with neighborhood into dataset 
dat2 <- merge(dat1, EastWest_Indicated, by = "neighborhood_indicator", all.x = T)

# merge additional neighborhood indicator into dataset 
id <- read_csv("data/raw_data/neighborhood_indicator.csv")
dat2 <- merge(dat2, id, by = "SbjNum", all.x = T)

# Assign treatment based on West_Mosul indicator
dat2 <- dat2 %>%
  mutate(treated = West_Mosul)

# Merge in neighborhood level covariates 
Neighborhood <- as.data.frame(st_read("data/raw_data/UN_Habitat_Mosul_Neighborhoods/Neighborhood.shp"))
Neighborhood <- Neighborhood %>% 
  select(c("OBJECTID", "POPULATION", "RES_UNITS", "Shape__Are")) %>% 
  rename(neighborhood_indicator2 = OBJECTID, 
         area = Shape__Are)
dat2 <- left_join(dat2, Neighborhood, by = "neighborhood_indicator2")
# Normalize population and residential units by area 
dat2$pop_density <- dat2$POPULATION/ dat2$area
dat2$res_density <- dat2$RES_UNITS/ dat2$area

# Calculate road density using data from of OpenStreetMap; takes about 30 minutes for loop to run 
# Mosul <- getbb("Mosul Iraq")
# Mosul[1,] <- c(43.05, 43.24)
# Mosul[2,] <- c(36.29, 36.41)
# streets <- Mosul%>%
#   opq()%>%
#   add_osm_feature(key = "highway") %>%
#   osmdata_sf()
# st_crs(streets$osm_lines) <- 4326
# Neighborhood <- st_read("data/raw_data/UN_Habitat_Mosul_Neighborhoods/Neighborhood.shp")
# Mosul <- st_union(Neighborhood$geometry)
# Mosul <- st_as_sf(Neighborhood)
# Mosul <- st_transform(Neighborhood, crs = 4326)
# plot(Mosul$geometry)
# 
# Mosul$poly_area <- st_area(Mosul$geometry)
# Mosul$road_length <- NA
# 
# for (i in 1:length(Mosul$OBJECTID)) {
#   print(i)
#   neighboorhood <- Mosul[Mosul$OBJECTID %in% i,]
#   # intersection of neighboorhood and roads
#   neighboorhood_streets <- st_intersection(neighboorhood, streets$osm_lines$geometry)
#   # length of roads in neighboorhood
#   Mosul$road_length[i] <- sum(st_length(neighboorhood_streets$geometry))
# }
# 
# Mosul$road_density <- as.numeric(Mosul$road_length / Mosul$poly_area)
# Mosul <- Mosul %>%
#   select(c(OBJECTID,road_density)) %>%
#   rename(neighborhood_indicator2 = OBJECTID)
# write_csv(Mosul, "data/processed_data/road_density.csv")

# instead just load pre-processed data 
road_density <- read_csv("data/processed_data/road_density.csv")
dat2 <- left_join(dat2, road_density, by = "neighborhood_indicator2")


# Clean outcome space of variables ----------------------------------------------------------
dat3 <- dat2

# gender: [Q13_1] 
dat3$gender <- ifelse(dat3$Q13_1 == 96, NA, dat3$Q13_1)

# age: [Q8_1] "How old are you?"
dat3$age <- ifelse(dat3$Q8_1 %in% c(98,96,99), NA, dat3$Q8_1) 

# income:[Q13_7] "Which of these statements comes closest to describing your household 
#income immediately before Daesh captured Mosul?"
dat3$income_pre_IS <- ifelse(dat3$Q13_7 %in% c(98,96,99), NA, dat3$Q13_7) 
dat3$income_pre_IS_1 <- as.integer(dat3$income_pre_IS == 1)
dat3$income_pre_IS_2 <- as.integer(dat3$income_pre_IS == 2)
dat3$income_pre_IS_3 <- as.integer(dat3$income_pre_IS == 3)
dat3$income_pre_IS_4 <- as.integer(dat3$income_pre_IS == 4)
dat3$income_pre_IS_3_4 <- as.integer(dat3$income_pre_IS > 2)


# identity: [Q13_15] "Which of the following best describes you? Above all, I am …”
dat3$identity <- ifelse(dat3$Q13_15 %in% c(98,96,99), NA, dat3$Q13_15) 
dat3$identity_1 <- as.integer(dat3$identity == 1)
dat3$identity_2 <- as.integer(dat3$identity == 2)
dat3$identity_3 <- as.integer(dat3$identity == 3)
dat3$identity_4 <- as.integer(dat3$identity == 4)
dat3$identity_5 <- as.integer(dat3$identity == 5)
dat3$identity_6 <- as.integer(dat3$identity == 6)
dat3$identity_7 <- as.integer(dat3$identity == 7)


# education: [Q8_2] "What is the highest level of education you have completed?"
dat3$education <- ifelse(dat3$Q8_2 %in% c(98,96,99), NA, dat3$Q8_2) 
dat3$education_1 <- as.integer(dat3$education == 1)
dat3$education_2 <- as.integer(dat3$education == 2)
dat3$education_3 <- as.integer(dat3$education == 3)
dat3$education_4 <- as.integer(dat3$education == 4)
dat3$education_5 <- as.integer(dat3$education == 5)
dat3$education_6 <- as.integer(dat3$education == 6)
dat3$education_7 <- as.integer(dat3$education == 7)
dat3$education_4_5_6_7 <- as.integer(dat3$education > 3)

# vote: [Q13_12] "Did you vote in the last parliamentary election in 2014?"
dat3$vote <- ifelse(dat3$Q13_12 %in% c(98,96,99), NA, dat3$Q13_12)

# sharia [Q9_6] Do you believe that Iraqi state law should be reformed to include more Sharia, less Sharia, or stay the same as it is now?
dat3$sharia <- ifelse(dat3$Q9_6 %in% c(98,96,99), NA, dat3$Q9_6) 
dat3$sharia_1 <- as.integer(dat3$sharia == 1)
dat3$sharia_2 <- as.integer(dat3$sharia == 2)
dat3$sharia_3 <- as.integer(dat3$sharia == 3)

# prayer: [Q13_10] How often do you attend Friday prayer?
dat3$prayer <- ifelse(dat3$Q13_10 %in% c(98,96,99), NA, dat3$Q13_10) 
dat3$prayer_1 <- as.integer(dat3$prayer == 1)
dat3$prayer_2 <- as.integer(dat3$prayer == 2)
dat3$prayer_3 <- as.integer(dat3$prayer == 3)
dat3$prayer_4 <- as.integer(dat3$prayer == 4)
dat3$prayer_5 <- as.integer(dat3$prayer == 5)
dat3$prayer_4_5 <- as.integer(dat3$prayer > 3)

## Grievances with the Iraqi government (considering multiple related questions)

# "During the years 2006- 2014... 
# [Q10_10] ... did you ever feel disrespected or insulted by an Iraqi police officer?"
dat3$gov_grievance_police <- ifelse(dat3$Q10_10 %in% c(98, 96, 99), NA, dat3$Q10_10)
# [Q10_11] "...were you ever arrested?"
dat3$gov_grievance_arrested <- ifelse(dat3$Q10_11 %in% c(98, 96, 99), NA, dat3$Q10_11)
# [Q10_12] "...did you ever feel that you
# were discriminated against by an Iraqi government employee (either
# military or civilian) for being Sunni?"
dat3$gov_grievance_sunni <- ifelse(dat3$Q10_12 %in% c(98, 96, 99), NA, dat3$Q10_12)
# [Q10_13] "...did you ever participate in a demonstration or protest directed at
# the Iraqi government?"
dat3$gov_grievance_protest <- ifelse(dat3$Q10_13 %in% c(98, 96, 99), NA, dat3$Q10_13)

# Combine grievances into one variable: 'gov_grievances'
dat3$gov_grievances <- rowSums(dat3[, c("gov_grievance_police", "gov_grievance_arrested", "gov_grievance_sunni", "gov_grievance_protest")], na.rm = TRUE)
# Handle missing values: If all grievance indicators are NA, set 'gov_grievances' to NA
dat3$gov_grievances <- ifelse(rowSums(is.na(dat3[, c("gov_grievance_police", "gov_grievance_arrested", "gov_grievance_sunni", "gov_grievance_protest")])) == 4, NA, dat3$gov_grievances)


## Harmed during Battle of Mosul (considering multiple related questions)
# Did your household experience any of the following harms during the battle for Mosul?
# house_damage: [Q12_4_1] Was the house or apartment that you were living in during the battle seriously damaged? 
dat3$house_damage <- ifelse(dat3$Q12_4_1 %in% c(98,96,99), NA, dat3$Q12_4_1) 
# hh_injured: [Q12_4_3] Was a member of your household injured?
dat3$hh_injured <- ifelse(dat3$Q12_4_3 %in% c(98,96,99), NA, dat3$Q12_4_3) 
# hh_killed: [Q12_4_3] Was a member of your household killed?
dat3$hh_killed <- ifelse(dat3$Q12_4_4 %in% c(98,96,99), NA, dat3$Q12_4_4) 
# Combine household harm into one variable: 'death_or_injury'
dat3$death_or_injury <- ifelse(dat3$hh_injured%in% 1| dat3$hh_killed%in% 1 , 1, 0)
dat3$death_or_injury <- ifelse(is.na(dat3$hh_injured) & is.na(dat3$hh_killed), NA, dat3$death_or_injury)
# Create binary version of satellite-detected damage
dat3$damage_in_10m_binary <- ifelse(dat3$damage_in_10m > 0, 1, dat3$damage_in_10m)

## Harmed by IS (considering multiple related questions)
# [Q11_21_1] "Was the house or apartment that you were living in during the time that Daesh was in
# control of Mosul seriously damaged?"
dat3$IS_house_damage<- ifelse(dat3$Q11_21_1 %in% c(98,96,99), NA, dat3$Q11_21_1) 
# Q11_21_2] "Was the house or apartment that you were living in
# during the time that Daesh was in control of Mosul confiscated by Daesh?"
dat3$IS_house_confiscated<- ifelse(dat3$Q11_21_2 %in% c(98,96,99), NA, dat3$Q11_21_2) 
# [Q11_21_3] "Was a member of your household injured?"
dat3$IS_injured<- ifelse(dat3$Q11_21_3 %in% c(98,96,99), NA, dat3$Q11_21_3) 
# [Q11_21_4] "Was a member of this household killed?"
dat3$IS_killed<- ifelse(dat3$Q11_21_4 %in% c(98,96,99), NA, dat3$Q11_21_4) 
# Combine harm into one variable: 'IS_any_harm'
dat3$IS_any_harm <- ifelse(dat3$IS_house_damage %in% 1 | dat3$IS_house_confiscated %in% 1 | dat3$IS_injured %in% 1
                                | dat3$IS_killed %in% 1, 1, 0)
# Handle missing values: If all harm indicators are NA, set 'IS_any_harm' to NA
dat3$IS_any_harm <- ifelse(is.na(dat3$IS_house_damage)&  is.na(dat3$IS_house_confiscated) & is.na(dat3$IS_injured) &
                                  is.na(dat3$IS_killed) , NA, dat3$IS_any_harm)
# Combine blame for harm into one variable: 'IS_blame'
dat3$IS_blame <- ifelse(dat3$Q11_22 %in% 1| dat3$Q11_23 %in% 1|dat3$Q11_24 %in% 1, 1, 0)
# Handle missing values: If all harm indicators are NA, set 'IS_blame_IS' to NA
dat3$IS_blame <- ifelse(is.na(dat3$Q11_22)&  is.na(dat3$Q11_23) & is.na(dat3$Q11_24), NA, dat3$IS_blame)

## IS Service provisions (considering multiple related questions)

# "During the first six months of Daesh rule, did Daesh collect any of the following 
# types of taxes and fees from this household?..."
# [Q11_16_1] Electricity fees
dat3$IS_electric <- ifelse(dat3$Q11_16_1 %in% c(98,96,99), NA, dat3$Q11_16_1) 
# [Q11_16_2] Water fees
dat3$IS_water<- ifelse(dat3$Q11_16_2 %in% c(98,96,99), NA, dat3$Q11_16_2) 
# [Q11_16_3] Zakat 
dat3$IS_zakat <- ifelse(dat3$Q11_16_3 %in% c(98,96,99), NA, dat3$Q11_16_3) 

# Combine service provisions into one variable: 'IS_services'
dat3$IS_services <- dat3$IS_electric + dat3$IS_water + dat3$IS_zakat
# Handle missing values: If all service indicators are NA, set 'IS_services' to NA
dat3$IS_services <- ifelse(is.na(dat3$IS_electric)&  is.na(dat3$IS_water) & is.na(dat3$IS_zakat), NA, dat3$IS_services)

# # Military Legitimacy  (considering multipleactors)
# "In your opinion, how likely are the following actors to kill innocent civilians?"
# [Q12_8_1] United States
dat3$us_legitimacy <- ifelse(dat3$Q12_8_1 %in% c(98,96,99), NA, dat3$Q12_8_1)
# [Q12_8_2] Iraqi Counter-Terrorism Service
dat3$cts_legitimacy <- ifelse(dat3$Q12_8_2 %in% c(98,96,99), NA, dat3$Q12_8_2)
# [Q12_8_3] Iraqi Army (Regular)
dat3$army_legitimacy <- ifelse(dat3$Q12_8_3 %in% c(98,96,99), NA, dat3$Q12_8_3)
# [Q12_8_4] Federarl Police
dat3$police_legitimacy <- ifelse(dat3$Q12_8_4 %in% c(98,96,99), NA, dat3$Q12_8_4)
# [Q12_8_5] PMF
dat3$pmf_legitimacy <- ifelse(dat3$Q12_8_5 %in% c(98,96,99), NA, dat3$Q12_8_5)

# reverse code the outcome space; perception that actor is less likely to kill civilians = higher legitimacy
dat3 <-
  dat3 |>
  mutate(
    us_legitimacy = case_match(
      us_legitimacy,
      4 ~ 1,
      3 ~ 2,
      2 ~ 3,
      1~ 4
    ), 
    cts_legitimacy = case_match(
      cts_legitimacy,
      4 ~ 1,
      3 ~ 2,
      2 ~ 3,
      1~ 4
    ), 
    army_legitimacy = case_match(
      army_legitimacy,
      4 ~ 1,
      3 ~ 2,
      2 ~ 3,
      1~ 4
    ), 
    police_legitimacy = case_match(
      police_legitimacy,
      4 ~ 1,
      3 ~ 2,
      2 ~ 3,
      1~ 4
    ), 
    pmf_legitimacy = case_match(
      pmf_legitimacy,
      4 ~ 1,
      3 ~ 2,
      2 ~ 3,
      1~ 4
    )
  )


# Combine legitimacy into one variable by taking mean across actors: 'military_legitimacy'
selected_columns <- dat3 %>% select(cts_legitimacy, police_legitimacy, army_legitimacy)
dat3$military_legitimacy <- rowMeans(selected_columns, na.rm = F)


## Looting(considering multiple actors)

# "Have you witnessed or heard about cases in which the following forces have stolen property or money
# (looting) from civilians in Mosul?"
# [Q12_9_1] Iraqi Counter-Terrorism
dat3$Q12_9_1 <- ifelse(dat3$Q12_9_1 %in% c(98,96,99), NA, dat3$Q12_9_1)
# [Q12_9_2] Iraqi Army (Regular)
dat3$Q12_9_2 <- ifelse(dat3$Q12_9_2 %in% c(98,96,99), NA, dat3$Q12_9_2) 
# [Q12_9_3] Iraqi Federal Police
dat3$Q12_9_3 <- ifelse(dat3$Q12_9_3 %in% c(98,96,99), NA, dat3$Q12_9_3) 

# Combine looting into one variable: 'looting'
dat3$looting <- ifelse(dat3$Q12_9_1%in% 1|dat3$Q12_9_2%in%1| dat3$Q12_9_3%in% 1, 1, 0)

## "Intermediate Variables"
# daesh_govern: [Q11_11] "During the first six months of Daesh rule, did you believe that Daesh 
#was doing a better job of governing Mosul than the Iraqi government did previously?"
dat3$daesh_govern <- ifelse(dat3$Q11_11 %in% c(98,96,99), NA, dat3$Q11_11)
# daesh_corrupt: [Q11_12] "During the first six months of Daesh rule, to what extent did you think
# that Daesh was corrupt?"
dat3$daesh_corrupt <- ifelse(dat3$Q11_12 %in% c(98,96,99), NA, dat3$Q11_12)
# daesh_taxes: [Q11_17] "How much do you agree or disagree with the following statement?
#`The taxes and fees collected by Daesh were fair in exchange for the services that Daesh was providing.`''
dat3$daesh_taxes <- ifelse(dat3$Q11_17 %in% c(98,96,99), NA, dat3$Q11_17)
# daesh_school: [Q11_20] "Did any of the children in this household attend schools in Mosul
#while Daesh controlled the city?"
dat3$daesh_school <- ifelse(dat3$Q11_20 %in% c(98,96,99), NA, dat3$Q11_20)
# corrupt_current: [Q10_4] "In general, to what extent do you think that the current Iraqi government is corrupt?"
dat3$corrupt_current <- ifelse(dat3$Q10_4 %in% c(98,96,99), NA, dat3$Q10_4)


# Subset and Export ----------------------------------------------------------

# subset variables for analysis
dat4 <- dat3 %>%
  select(SbjNum, Neighborhood, 
         gender, age, 
         income_pre_IS, income_pre_IS_1, income_pre_IS_2, income_pre_IS_3, income_pre_IS_4, income_pre_IS_3_4,
         identity, identity_1, identity_2,identity_3, identity_4, identity_5, identity_6, identity_7,
         education, education_1, education_2, education_3, education_4, education_5, education_6, education_7, education_4_5_6_7, 
         vote, 
         treated, 
         cts_legitimacy, police_legitimacy, army_legitimacy, military_legitimacy, us_legitimacy, pmf_legitimacy,  
         house_damage, hh_injured, hh_killed, death_or_injury, 
         damage_in_10m, damage_in_10m_binary, damage_in_50m, damage_in_100m,damage_in_500m,
         gov_grievances, gov_grievance_police, gov_grievance_arrested, gov_grievance_sunni, gov_grievance_protest, 
         IS_any_harm, IS_electric, IS_water, IS_zakat, 
         sharia, sharia_1, sharia_2, sharia_3, 
         prayer, prayer_1, prayer_2, prayer_3, prayer_4, prayer_5, prayer_4_5,
         daesh_govern, daesh_corrupt, daesh_taxes, corrupt_current, 
         looting, 
         pop_density, road_density, res_density)

write_dta(dat4, "data/clean_survey.dta")

# Clean Health Survey (Lafta et al. 2018)  ---------------------------------------------------------
data <-read_excel("data/raw_data/mosul_deaths_injuries_min_dataset_21jan2018.xls")

# limit the sample to Death and Injury reported during the battle of mosul 
data <- data %>% 
  filter(!is.na(personmonths_lib))

# create cause of death variables 
# any
data$death <- ifelse(data$death_violent_event %in% "Yes", 1,0)
# airstrike 
data$death_airstrike <- ifelse(data$death_cause_conflict %in% "Airstrike", 1,0)
# explosion 
data$death_explosions <- ifelse(data$death_cause_conflict %in% "Other explosions", 1,0)
# gunshot 
data$death_gunshot <- ifelse(data$death_cause_conflict %in% "Gunshot", 1,0)
# car bomb
data$death_carbomb <- ifelse(data$death_cause_conflict %in% "Car bomb" , 1,0)
# other intentional 
data$death_other_conflict <- ifelse(data$death_cause_conflict %in% "Other conflict-related injury" , 1,0)

# export 
export <- data %>% 
  select(side, death, death_airstrike, death_explosions, death_gunshot, death_carbomb, death_other_conflict)
write_dta(export, "data/processed_data/health_survey.dta")


